set.seed(02143)
library(data.table)
library(vdemdata)
library(lfe)

source('split_samp_mcseq_funs.R')

df <- vdem
lag5 <- lag4 <- lag3 <- lag2 <- lag1 <- fut1 <- df[,c('country_name', 'year', 'e_pelifeex', 'e_civil_war', 'e_miinteco', 'e_miinterc', 'e_pt_coup', 'e_migdppc')]
lag5$year <- lag1$year + 5
lag4$year <- lag1$year + 4
lag3$year <- lag1$year + 3
lag2$year <- lag1$year + 2
lag1$year <- lag1$year + 1
fut1$year <- lag1$year - 1

colnames(lag1)[-c(1:2)] <- paste0(colnames(lag1)[-c(1:2)], 'past')
colnames(lag2)[-c(1:2)] <- paste0(colnames(lag2)[-c(1:2)], 'past2')
colnames(lag3)[-c(1:2)] <- paste0(colnames(lag3)[-c(1:2)], 'past3')
colnames(lag4)[-c(1:2)] <- paste0(colnames(lag4)[-c(1:2)], 'past4')
colnames(lag5)[-c(1:2)] <- paste0(colnames(lag5)[-c(1:2)], 'past5')
colnames(fut1)[-c(1:2)] <- paste0(colnames(fut1)[-c(1:2)], 'future')


df <- merge(df, fut1)
df <- merge(df, lag1)
df <- merge(df, lag2)
df <- merge(df, lag3)
df <- merge(df, lag4)
df <- merge(df, lag5)
df$onset <- (df$e_civil_warfuture ==1) & (df$e_civil_warpast ==0)

df <- df[!is.na(df$onset),]

summary(felm(e_civil_war~e_civil_warpast|country_name + year,df))
summary(felm(e_civil_war~e_civil_warpast+ e_civil_warpast2|country_name + year,df))
summary(felm(e_civil_war~e_civil_warpast+ + e_civil_warpast2 + e_civil_warpast3|country_name + year,df))

summary(felm(e_civil_war~e_civil_warpast+ e_civil_warpast2 + e_civil_warpast3 + e_civil_warpast4|country_name + year,df))
summary(felm(e_civil_war~e_civil_warpast+ e_civil_warpast2 + e_civil_warpast3 + e_civil_warpast4|country_name + year|0|country_name,df))
summary(felm(e_civil_war~e_civil_warpast+ + e_civil_warpast2 + e_civil_warpast3 + e_civil_warpast4+ e_civil_warpast5|country_name + year|0|country_name,df))


summary(felm(onset~ v2x_polyarchy + e_civil_warpast+ e_civil_warpast2 + e_civil_warpast3 + e_civil_warpast4|country_name + year|0|0,df))
summary(felm(e_civil_war~ v2x_polyarchy + e_civil_warpast+ e_civil_warpast2 + e_civil_warpast3 + e_civil_warpast4|country_name + year|0|country_name,df))


drop_high_missingness <- function(X){
  X <- X[,!(grepl('_codehigh', colnames(X))|
              grepl('_codelow', colnames(X))|
              grepl('_ord', colnames(X))|
              grepl('_osp', colnames(X))|
              grepl('_sd', colnames(X))|
              grepl('_mean', colnames(X))|
              grepl('_nr', colnames(X))
  )
  ]
  
  X <- X[,colMeans(is.na(X))<.1]
  X
}

get_ests <- function(X, interval=seq(0,.5, by=.01), mcseq_fun=get_mcseq_ls_treat){
  y <- df$onset
  # lags <- as.matrix(df[,c('e_civil_warpast', 'e_civil_warpast2', 'e_civil_warpast3', 'e_civil_warpast4')])
  gdp <- df$e_migdppcpast
  
  keep <- complete.cases(X) & !is.na(y) #& !is.na(gdp) 
  y <- y[keep]
  year <- df$year[keep]
  country <- df$country_name[keep]
  X <- X[keep,]
  gdp <- gdp[keep]
  
  covs <- data.frame(country, year, gdp)
  
  #  X <- model.matrix(~.*., data=as.data.frame(X[,!is.na(coef(lm(y~X)))[-1]]))
  
  ests <- c()
  lower <- c()
  upper <- c()
  samples <- c()
  for(q in interval){
    mod <- est_mcseq_2_samp_wrapped(treat_types=X, y=y, clust=country, covars=covs, q=q, nsims=10, nboot=50, est_fun=est_treat_mcseq_ls, form='|country + year', wraps=5)
    mod
    ests <- c(ests, mod[[1]])
    upper <- c(upper, mod[[1]] + 1.96*mod[[2]])
    lower <- c(lower, mod[[1]] - 1.96*mod[[2]])
    }
  
  data.frame(est=ests,lower=lower, upper=upper, q=interval, controls='FE')
}

int <- seq(0.05, .5, by=.05)

summary(felm(onset~indicators|e_migdppcpast,df))

democ_vars <- unique(c(colnames(df)[substr(colnames(df), 1,4)=='v2ps'], 
                       colnames(df)[substr(colnames(df), 1,4)=='v2el'], 
                       'v2x_polyarchy', 'v2x_partipdem', 'v2x_suffr', 
                       'v2asuffrage'))


formal_inst_vars <- unique(c(colnames(df)[substr(colnames(df), 1,4)=='v2ex'], colnames(df)[substr(colnames(df), 1,4)=='v2lg'], colnames(df)[substr(colnames(df), 1,4)=='v2ju'],
                             colnames(df)[substr(colnames(df), 1,4)=='v2st'], colnames(df)[substr(colnames(df), 1,4)=='v2sv']))
formal_inst_vars <- formal_inst_vars[formal_inst_vars!= 'v2svstterr']# & formal_inst_vars!= 'v2svinlaut' & !grepl('v2exrmhsol', formal_inst_vars) & !grepl('v2exctlhs',formal_inst_vars)]


rights_and_equality_vars <- unique(c(colnames(df)[substr(colnames(df), 1,4)=='v2cl'], colnames(df)[substr(colnames(df), 1,4)=='v2cs'],
                                     colnames(df)[substr(colnames(df), 1,4)=='vme'], 
                                     'v2caassemb','v2x_libdem'))

#rights_and_equality_vars <- rights_and_equality_vars[!rights_and_equality_vars %in% c('v2clgencl', 'v2clgeocl')]
rights_and_equality_vars <- unique(c(colnames(df)[substr(colnames(df), 1,4)=='v2cl'], colnames(df)[substr(colnames(df), 1,4)=='v2cs'],
                                     colnames(df)[substr(colnames(df), 1,4)=='vme'], 
                                     'v2caassemb','v2x_libdem'))

#rights_and_equality_vars <- rights_and_equality_vars[!rights_and_equality_vars %in% c('v2clgencl', 'v2clgeocl')]
rights_and_equality_vars <- rights_and_equality_vars[!grepl('csan', rights_and_equality_vars)]

exclusion_vars <- c(colnames(df)[substr(colnames(df), 1,5)=='v2pea'],colnames(df)[substr(colnames(df), 1,7)=='v2elrst'], 
                    colnames(df)[substr(colnames(df), 1,7)=='v2pepwr'], 'v2x_egaldem', 'v2xeg_eqprotec', 'v2xeg_eqdr')



all_vars <- c(democ_vars, formal_inst_vars, rights_and_equality_vars)
all <- df[,all_vars]
all <- as.matrix(all[,unlist(lapply(all,is.numeric))])
all <- drop_high_missingness(all)


all_ests <- get_ests(X=all, interval=int)
all_ests
all_ests$institutions <- 'all'
poly_ests <- get_ests(X=cbind(1,df$v2x_polyarchy), int=int)
poly_ests$institutions <- 'polyarchy'

ests <- do.call(rbind, list(all_ests, poly_ests))
ests$na_thresh <- .05

fwrite(ests, 'Intermediate Files/democ_ests_neur_ips.csv') 
